library(data.table)
library(tidyverse)
library(Seurat)
library(patchwork)
library(reactable)
# set your working directory
setwd("~/Documents/HGEN_663/extra/lec8")
We’ll start with pre-computed Seurat objects.
# import MacCarroll dataset
Seurat_object_MacCarroll <- readRDS("Seurat_object_MacCarroll.rds")
Seurat_object_MacCarroll[["Dataset"]] <- "MacCarroll"
# import Joyal dataset
Seurat_object_Joyal <- readRDS("Seurat_object_Joyal.rds")
Seurat_object_Joyal[["Dataset"]] <- "Joyal"
# merge the 2 dataset
Seurat_object <- merge(Seurat_object_MacCarroll,
y = Seurat_object_Joyal,
add.cell.ids = NULL,
project = "Lab_integration")
# before proceeding, we note markers for cell cycle
Seurat_object <- CellCycleScoring(Seurat_object, set.ident = FALSE,
s.features = cc.genes$s.genes,
g2m.features = cc.genes$g2m.genes)
# we then go on to apply a transformation to the data and regresses out unwanted variation
vs <- c("nFeature_RNA", "percent.mito", "S.Score", "G2M.Score")
Seurat_object.list <- SplitObject(Seurat_object, split.by = "Dataset") %>%
lapply(SCTransform, verbose = F, vars.to.regress = vs)
# select most consistently variable features
Seurat_object.features <- SelectIntegrationFeatures(object.list = Seurat_object.list,
nfeatures = 3000)
# some data wrangling
Seurat_object.list <- PrepSCTIntegration(object.list = Seurat_object.list,
anchor.features = Seurat_object.features)
# identify anchors shared by the datasets
Seurat_object.anchors <- FindIntegrationAnchors(object.list = Seurat_object.list,
normalization.method = "SCT",
anchor.features = Seurat_object.features)
# proceed with integration
Seurat_object <- IntegrateData(anchorset = Seurat_object.anchors,
normalization.method = "SCT")
Seurat_object <- RunPCA(Seurat_object) %>%
RunUMAP(dims = 1:20) %>%
RunTSNE(dims = 1:20)
Seurat_object <- FindNeighbors(Seurat_object,
dims = 1:2,
reduction = "umap")
Seurat_object <- FindClusters(Seurat_object,
resolution = 0.5,
reduction = "umap")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8393
## Number of edges: 182804
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9519
## Number of communities: 30
## Elapsed time: 0 seconds
DimPlot(Seurat_object, reduction = "umap", label=TRUE)
DimPlot(Seurat_object, reduction = "umap", label=TRUE, split.by="Dataset")
DimPlot(Seurat_object, reduction = "tsne", label=TRUE)
DimPlot(Seurat_object, reduction = "tsne", label=TRUE, split.by="Dataset")
We could again identify markers genes as before
DefaultAssay(Seurat_object) <- "integrated"
Seurat_object.markers <- FindAllMarkers(Seurat_object, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
head(Seurat_object.markers) %>% reactable()
top10 <- Seurat_object.markers %>%
group_by(cluster) %>%
slice_max(avg_log2FC, n = 10) %>%
ungroup()
DotPlot(
Seurat_object,
assay = NULL,
unique(top10$gene),
cols = c("blue", "red"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
group.by = NULL,
split.by = NULL,
scale.by = "radius",
scale.min = NA,
scale.max = NA
) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
Or again adopt known markers
markers.retina.dotplot <- toupper(c(
"Rho", #Rods
"Opn1mw", #Cones
"Trpm1", #Bipolar cells
"Scgn", #Bipolar cells
"Celf4", #Amacrine cells
"Rgs5", #Pericytes
"Cldn5", #Endothelial cells
"Lyz2", #Immune cells
"Glul", # Muller glia
"Gfap", #Astrocytes
"Optc", #Lens cells
"Lhx1" #Horizontal cells
))
DotPlot(
Seurat_object,
assay = NULL,
markers.retina.dotplot,
cols = c("blue", "red"),
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 6,
group.by = NULL,
split.by = NULL,
scale.by = "radius",
scale.min = NA,
scale.max = NA
) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
Or make use of pre-existing labels annotated independently
DimPlot(Seurat_object, reduction = "umap", label = TRUE,
pt.size = 0.5, group.by="Cell_Type") + NoLegend()
Activity 3: Assign as many cluster labels as you can based on the provided information
Seurat_object <- SetIdent(Seurat_object, cells = NULL, value="seurat_clusters")
new.cluster.ids <- c("", # cluster 0
"", # cluster 1
"", # cluster 2
"", # cluster 3
"", # cluster 4
"", # cluster 5
"", # cluster 6
"", # cluster 7
"", # cluster 8
"", # cluster 9
"", # cluster 10
"", # cluster 11
"", # cluster 12
"", # cluster 13
"", # cluster 14
"", # cluster 15
"", # cluster 16
"", # cluster 17
"", # cluster 18
"", # cluster 19
"", # cluster 20
"", # cluster 21
"", # cluster 22
"", # cluster 23
"", # cluster 24
"", # cluster 25
"", # cluster 26
"", # cluster 27
"" # cluster 28
)
names(new.cluster.ids) <- levels(Seurat_object)
Seurat_object <- RenameIdents(Seurat_object, new.cluster.ids)
Seurat_object[["Cell_Type"]] <- Idents(object = Seurat_object)
DimPlot(Seurat_object, reduction = "umap", label = T)